Description

Analysis of Corona virus.

Country codes can be found in this link

Imports

In [1]:
import time
time_start_notebook = time.time()
In [2]:
import numpy as np
import pandas as pd
pd.plotting.register_matplotlib_converters() # to plot timeseries
In [3]:
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = 8,8
plt.rcParams.update({'font.size': 16})

plt.style.use('ggplot')
%matplotlib inline
In [4]:
import seaborn as sns
sns.set(color_codes=True)
/Users/poudel/miniconda3/envs/dataSc/lib/python3.7/site-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
  import pandas.util.testing as tm
In [5]:
import plotly
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly.tools as tls
from plotly.subplots import make_subplots
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=False)

[(x.__name__,x.__version__) for x in [plotly]]
Out[5]:
[('plotly', '4.5.2')]
In [6]:
import plotly.express as px 
In [7]:
import pycountry
import pycountry_convert as pc
import country_converter as coco
In [8]:
from functools import lru_cache
In [9]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;
In [10]:
# color pallette
CNF = '#393e46' # confirmed - grey
DTH = '#ff2e63' # death - red
REC = '#21bf73' # recovered - cyan
ACT = '#fe9801' # active case - yellow

Load the data files

In [11]:
!ls ../data/kaggle
covid_19_clean_complete.csv us_covid19_daily.csv        us_states_covid19_daily.csv
In [12]:
home = '../data/kaggle/'
dfc = pd.read_csv(home + 'covid_19_clean_complete.csv')
dfu = pd.read_csv(home + 'us_covid19_daily.csv')
dfus = pd.read_csv(home + 'us_states_covid19_daily.csv')
In [13]:
dfc.shape, dfu.shape, dfus.shape
Out[13]:
((13932, 8), (13, 8), (589, 7))
In [14]:
dfc.head(2).append(dfc.tail(2))
Out[14]:
Province/State Country/Region Lat Long Date Confirmed Deaths Recovered
0 NaN Thailand 15.0000 101.0000 1/22/20 2 0 0
1 NaN Japan 36.0000 138.0000 1/22/20 2 0 0
13930 Mayotte France -12.8275 45.1662 3/15/20 1 0 0
13931 NaN Uzbekistan 41.3775 64.5853 3/15/20 1 0 0
In [15]:
dfu.head(2).append(dfu.tail(2))
Out[15]:
date states positive negative posNeg pending death total
0 20200304 14 118 748 866 103 NaN 969
1 20200305 24 176 971 1147 197 NaN 1344
11 20200315 51 3173 22548 25721 2242 60.0 27963
12 20200316 56 4019 35840 39859 1691 71.0 41552
In [16]:
dfus.head(2).append(dfus.tail(2))
Out[16]:
date state positive negative pending death total
0 20200316 AK 1.0 143.0 NaN NaN 144.0
1 20200316 AL 28.0 28.0 40.0 0.0 96.0
587 20200304 WA 39.0 NaN NaN NaN 39.0
588 20200304 WI 1.0 19.0 6.0 NaN 26.0

Data Cleaning

In [17]:
dfc['Active'] = dfc['Confirmed'] - dfc['Recovered']
dfc.head(2)
Out[17]:
Province/State Country/Region Lat Long Date Confirmed Deaths Recovered Active
0 NaN Thailand 15.0 101.0 1/22/20 2 0 0 2
1 NaN Japan 36.0 138.0 1/22/20 2 0 0 2
In [18]:
# Clean name of China
In [19]:
dfc.nlargest(1,'Confirmed')
Out[19]:
Province/State Country/Region Lat Long Date Confirmed Deaths Recovered Active
13829 Hubei China 30.9756 112.2707 3/15/20 67794 3085 54288 13506
In [20]:
dfc['Country/Region'].value_counts()
Out[20]:
US                     2970
China                  1782
Canada                  540
Australia               486
France                  324
                       ... 
Antigua and Barbuda      54
Argentina                54
Mexico                   54
Guyana                   54
Taiwan*                  54
Name: Country/Region, Length: 147, dtype: int64
In [21]:
dfc['Country/Region'] = dfc['Country/Region'].replace('Mainland China', 'China')
In [22]:
# fill nans of states
dfc[['Province/State']] = dfc[['Province/State']].fillna('unknown')

Create new dataframes

In [23]:
dfs =dfc[dfc['Province/State'].str.contains('Grand Princess')|dfc['Province/State'].str.contains('Diamond Princess cruise ship')]


dfs.head(2)
Out[23]:
Province/State Country/Region Lat Long Date Confirmed Deaths Recovered Active
104 Grand Princess US 37.6489 -122.6655 1/22/20 0 0 0 0
225 Grand Princess Canada 37.6489 -122.6655 1/22/20 0 0 0 0

Create new column "Country code"

In [24]:
@lru_cache(maxsize=None)
def do_fuzzy_search(country):
    try:
        result = pycountry.countries.search_fuzzy(country)
        return result[0].alpha_2
    except:
        return np.nan

dfc['country_code'] = dfc["Country/Region"]\
  .apply(lambda country: do_fuzzy_search(country))


dfc.head()
Out[24]:
Province/State Country/Region Lat Long Date Confirmed Deaths Recovered Active country_code
0 unknown Thailand 15.0000 101.0000 1/22/20 2 0 0 2 TH
1 unknown Japan 36.0000 138.0000 1/22/20 2 0 0 2 JP
2 unknown Singapore 1.2833 103.8333 1/22/20 0 0 0 0 SG
3 unknown Nepal 28.1667 84.2500 1/22/20 0 0 0 0 NP
4 unknown Malaysia 2.5000 112.5000 1/22/20 0 0 0 0 MY
In [25]:
dfc[dfc.country_code.isnull()].shape
Out[25]:
(324, 10)
In [26]:
dfc[dfc.country_code.isnull()].head(2)
Out[26]:
Province/State Country/Region Lat Long Date Confirmed Deaths Recovered Active country_code
157 unknown Korea, South 36.0000 128.000 1/22/20 1 0 0 1 NaN
166 Diamond Princess Cruise Ship 35.4437 139.638 1/22/20 0 0 0 0 NaN
In [27]:
dfc.loc[dfc.country_code.isnull(),'Country/Region'].unique()
Out[27]:
array(['Korea, South', 'Cruise Ship', 'Taiwan*', 'Congo (Kinshasa)',
       'occupied Palestinian territory', 'Congo (Brazzaville)'],
      dtype=object)
In [28]:
bad_names = {'Korea, South':'KR',
             'Taiwan*':'TW',
             'Congo (Kinshasa)':'CD',
             'occupied Palestinian territory':'PS',
             'Congo (Brazzaville)':'CD',
             'Taipei and environs':'TW',
             'Iran (Islamic Republic of)':'IR',
            'Channel Islands':'GB',
            }

for k,v in bad_names.items():
    # print(k,v)
    dfc["country_code"] = dfc.apply(lambda row: v
                                    if row['Country/Region'] == k
                                    else row['country_code'],
                                    axis = 1)
In [29]:
dfc.loc[dfc.country_code.isnull(),'Country/Region'].unique()
Out[29]:
array(['Cruise Ship'], dtype=object)

Create new column "continent"

In [30]:
@lru_cache(maxsize=None)
def do_continent_search(alpha2):
    try:
        result = pc.country_alpha2_to_continent_code(alpha2)
        return result
    except Exception:
        return np.nan
In [31]:
dfc['continent'] = dfc["country_code"].apply(lambda x: do_continent_search(x))
In [32]:
# Add vatican city to EU
dfc["continent"] = dfc.apply(lambda row:"EU" if row['country_code'] == 'VA'   else row['continent'],axis = 1)
In [33]:
dfc.head(2)
Out[33]:
Province/State Country/Region Lat Long Date Confirmed Deaths Recovered Active country_code continent
0 unknown Thailand 15.0 101.0 1/22/20 2 0 0 2 TH AS
1 unknown Japan 36.0 138.0 1/22/20 2 0 0 2 JP AS

Clean county/region name and use short name

In [34]:
@lru_cache(maxsize=None)
def country(alpha2):
    try:
        result =coco.convert(names=alpha2,
                             to='name_short',
                             not_found = np.nan)
        return result
    except Exception:
        return np.nan
In [35]:
dfc['Country/Region'] = dfc['country_code'].apply(lambda x:country(x))

dfc.head(2)
WARNING:root:nan not found in ISO3
Out[35]:
Province/State Country/Region Lat Long Date Confirmed Deaths Recovered Active country_code continent
0 unknown Thailand 15.0 101.0 1/22/20 2 0 0 2 TH AS
1 unknown Japan 36.0 138.0 1/22/20 2 0 0 2 JP AS
In [36]:
dfc[dfc['Country/Region']=='Korea, South']
Out[36]:
Province/State Country/Region Lat Long Date Confirmed Deaths Recovered Active country_code continent
In [37]:
dfc[dfc['Country/Region']=='South Korea'].head(2)
Out[37]:
Province/State Country/Region Lat Long Date Confirmed Deaths Recovered Active country_code continent
157 unknown South Korea 36.0 128.0 1/22/20 1 0 0 1 KR AS
415 unknown South Korea 36.0 128.0 1/23/20 1 0 0 1 KR AS
In [38]:
dfc.dtypes
Out[38]:
Province/State     object
Country/Region     object
Lat               float64
Long              float64
Date               object
Confirmed           int64
Deaths              int64
Recovered           int64
Active              int64
country_code       object
continent          object
dtype: object

Date time columns

In [39]:
dfc['Date'] = pd.to_datetime(dfc['Date'])
dfc.head(2)
Out[39]:
Province/State Country/Region Lat Long Date Confirmed Deaths Recovered Active country_code continent
0 unknown Thailand 15.0 101.0 2020-01-22 2 0 0 2 TH AS
1 unknown Japan 36.0 138.0 2020-01-22 2 0 0 2 JP AS

Data for Visualization

In [40]:
latest = dfc[dfc["Date"] == max(dfc["Date"])].reset_index()
latest = latest.groupby('Country/Region').sum().reset_index()

print(latest.shape)
latest.head(2).append(latest.tail(2))
(142, 8)
Out[40]:
Country/Region index Lat Long Confirmed Deaths Recovered Active
0 Afghanistan 13700 33.0000 65.0000 16 0 0 16
1 Albania 13770 41.1533 20.1683 42 1 0 42
140 Venezuela 13910 6.4238 -66.5897 10 0 0 10
141 Vietnam 13866 16.0000 108.0000 56 0 16 40

Data Visualization using plotly

Barplots for Top 20 Countries

In [41]:
def plotly_top20_countries(df,col,color):
    dfx = df.sort_values(col, ascending=False)\
            .head(20)\
            .sort_values('Confirmed', ascending=True)
    fig = px.bar(dfx,
                 x=col,
                 y="Country/Region",
                 title=f'Total{col} Cases',
                 text=col,
                 orientation='h', 
                 width=700,
                 height=700,
                 range_x = [0, max(latest[col])*1.1])

    fig.update_traces(marker_color=color,
                      opacity=0.8,
                      textposition='outside')
    fig.show()
    
plotly_top20_countries(latest,'Confirmed',CNF)
In [42]:
plotly_top20_countries(latest,'Deaths',DTH)
In [43]:
plotly_top20_countries(latest,'Recovered',REC)
In [44]:
plotly_top20_countries(latest,'Active',ACT)
In [45]:
top20deaths = latest.nlargest(20,'Deaths')
fig = px.scatter(top20deaths, 
                 x='Confirmed',
                 y='Deaths',
                 color='Country/Region',
                 text='Country/Region',
                 log_x=True,
                 log_y=True,
                 height=800,
                 width=600,
                 title='Deaths vs Confirmed')
fig.update_traces(textposition='top center')
fig.show()

Total Numbers for Top 5 Countries

In [46]:
top5 = latest.nlargest(5, 'Confirmed')
top5
Out[46]:
Country/Region index Lat Long Confirmed Deaths Recovered Active
28 China 457049 1083.3367 3684.4197 81003 3203 67017 13986
65 Italy 13690 43.0000 12.0000 24747 1809 2335 22412
61 Iran 13830 32.0000 53.0000 13938 724 4590 9348
118 South Korea 13831 36.0000 128.0000 8162 75 510 7652
119 Spain 13692 40.0000 -4.0000 7798 289 517 7281
In [47]:
def plotly_barplot(df,title):

    trace1 = go.Bar(name='Confirmed',
               x=df["Country/Region"],
               y=df['Confirmed'],
               marker_color = 'rgb(55, 83, 109)')
        
    trace2 = go.Bar(name='Active',
               x=df["Country/Region"],
               y=df['Active'],
               marker_color = 'lightsalmon')
    
    trace3 = go.Bar(name='Revovered',
                    x=df["Country/Region"],
                    y=df['Recovered'],
                    marker_color = 'green' )
    
    trace4 = go.Bar(name='Deaths',
                    x=df["Country/Region"],
                    y=df['Deaths'],
                    marker_color = 'crimson' )
    
    data = [trace1, trace2, trace3, trace4]
    fig = go.Figure(data=data)
    
    fig.update_layout(barmode='group',title_text=title)
    fig.layout.template ='plotly_dark'
    fig.show()
    
    
title = 'Latest Top 5 countries'
plotly_barplot(top5,title)
In [48]:
dfx = latest[latest.Confirmed < 10]
dfx = dfx.nlargest(5,'Confirmed')

dfx
Out[48]:
Country/Region index Lat Long Confirmed Deaths Recovered Active
70 Kazakhstan 13889 48.0196 66.9237 9 0 0 9
82 Martinique 13762 14.6415 -61.0242 9 0 0 9
69 Jordan 13743 31.2400 36.5100 8 0 1 7
92 New Zealand 13722 -40.9006 174.8860 8 0 0 8
22 Cambodia 13683 11.5500 104.9167 7 0 1 6
In [49]:
title = 'Latest Top 5 countries with Confirmed < 10'
plotly_barplot(dfx,title)

Growth of Confirmed Cases for Mulitple Countries

In [50]:
dfx =dfc.groupby('Country/Region').sum().reset_index()
unq =dfx.loc[dfx['Confirmed']>1000, 'Country/Region'].unique()
unq.shape, unq
Out[50]:
((30,), array(['Australia', 'Austria', 'Bahrain', 'Belgium', 'Canada', 'China',
        'Denmark', 'France', 'Germany', 'Greece', 'Iceland', 'Iran',
        'Israel', 'Italy', 'Japan', 'Kuwait', 'Malaysia', 'Netherlands',
        'Norway', 'Qatar', 'Singapore', 'South Korea', 'Spain', 'Sweden',
        'Switzerland', 'Taiwan', 'Thailand', 'United Arab Emirates',
        'United Kingdom', 'United States'], dtype=object))
In [51]:
dfy = dfc.groupby(['Country/Region','Date']).sum().reset_index()
dfy = dfy[dfy['Country/Region'].isin(unq)]
print(dfy.shape)
dfy.head(2)
(1620, 8)
Out[51]:
Country/Region Date Lat Long Confirmed Deaths Recovered Active
432 Australia 2020-01-22 -220.5258 1269.5003 0 0 0 0
433 Australia 2020-01-23 -220.5258 1269.5003 0 0 0 0
In [52]:
gb = dfy.groupby('Country/Region')
In [53]:
def plotly_mulitplots(df,unq,title):
    counter = 0
    fig = make_subplots(rows=unq.reshape(-1,3).shape[0],
                        cols=unq.reshape(-1,3).shape[1],
                        subplot_titles=unq)

    for i in  range(1, unq.reshape(-1,3).shape[0]+1):
        for j in range(1, unq.reshape(-1,3).shape[1]+1):
            grp = gb.get_group(unq[counter])
            fig.add_trace(go.Scatter(x=grp['Date'],
                                     y=grp['Confirmed'],
                                     mode='lines',
                                     name=unq[counter]
                                    ),
                          row=i,
                          col=j)
            counter += 1


    fig.update_layout(height=2000,
                      width=1400,
                      title_text=title,
                      showlegend=False)
    fig.layout.template ='plotly_dark'
    fig.show()
    
title = "Countries with Confirmed Cases > 1_000"
plotly_mulitplots(dfy,unq,title)

Confirmed Cases per Continent

In [54]:
dfx = dfc[dfc["Date"] == max(dfc["Date"])].reset_index()
dfx = dfx.groupby('continent').sum().reset_index()\
         .sort_values('Confirmed',ascending =False)
dfx
Out[54]:
continent index Lat Long Confirmed Deaths Recovered Active
1 AS 993237 2091.8665 6569.0147 106919 4058 72621 34298
2 EU 771734 2433.0157 531.2690 54879 2294 3005 51874
3 NA 1105488 2847.8182 -6707.8357 3908 65 24 3884
5 SA 165689 -151.6889 -773.7467 421 5 1 420
0 AF 373810 127.9912 441.7049 320 8 35 285
4 OC 137247 -261.4264 1444.3863 305 3 23 282
In [55]:
def plotly_continent_plot(df,title):
    
    trace1 = go.Bar(name='Active',
                    x=df["continent"],
                    y=df['Active'],
                    marker_color = 'rgb(55, 83, 109)')

    trace2 = go.Bar(name='Revovered',
                    x=df["continent"],
                    y=df['Recovered'],
                    marker_color='green')

    trace3 = go.Bar(name='Deaths',
                    x=df["continent"],
                    y=df['Deaths'],
                    marker_color='crimson' )

    data = [trace1,trace2,trace3]
    fig = go.Figure(data)
    fig.update_layout(barmode='group',title_text=title)
    fig.layout.template ='plotly_dark'
    fig.show()
    
title = 'Total Confirmed Cases per Continent'
plotly_continent_plot(dfx,title)

Line plot of Growth

In [56]:
tmp = dfc.groupby('Date').sum().reset_index()
fig = go.Figure()

fig.add_trace(go.Scatter(x=tmp['Date'], y=tmp['Confirmed'],
                    mode='lines',
                    name='Confirmed'))
fig.add_trace(go.Scatter(x=tmp['Date'], y=tmp['Recovered'],
                    mode='lines',
                    name='Recovered'))
fig.add_trace(go.Scatter(x=tmp['Date'], y=tmp['Deaths'],
                    mode='lines',
                    name='Deaths'))
fig.update_layout(barmode='stack',title_text ='Line plot fo Growth')
fig.layout.template ='plotly_dark'
fig.show()
In [57]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=tmp['Date'], y=tmp['Active'],
                    mode='lines',
                    name='Active'))
fig.add_trace(go.Scatter(x=tmp['Date'], y=tmp['Recovered'],
                    mode='lines',
                    name='Recovered'))
fig.update_layout(barmode='stack',title_text ='Covid Patterns')
fig.layout.template ='plotly_dark'
fig.show()

Area plot of growth

In [58]:
dfx = dfc.groupby('Date')['Recovered', 'Deaths', 'Active']\
         .sum().reset_index()
dfx.head()
/Users/poudel/miniconda3/envs/tf2/lib/python3.7/site-packages/ipykernel_launcher.py:1: FutureWarning:

Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.

Out[58]:
Date Recovered Deaths Active
0 2020-01-22 28 17 526
1 2020-01-23 30 18 622
2 2020-01-24 36 26 903
3 2020-01-25 39 42 1393
4 2020-01-26 52 56 2061
In [59]:
dfx = dfc.groupby('Date')\
         .agg({'Recovered':'sum',
               'Deaths':'sum',
               'Active':'sum'})\
         .reset_index()
dfx.head()
Out[59]:
Date Recovered Deaths Active
0 2020-01-22 28 17 526
1 2020-01-23 30 18 622
2 2020-01-24 36 26 903
3 2020-01-25 39 42 1393
4 2020-01-26 52 56 2061
In [60]:
dfx = dfx.melt(id_vars="Date",
               value_vars=['Recovered', 'Deaths', 'Active'],
               var_name='Case',
               value_name='Count')
dfx.head()
Out[60]:
Date Case Count
0 2020-01-22 Recovered 28
1 2020-01-23 Recovered 30
2 2020-01-24 Recovered 36
3 2020-01-25 Recovered 39
4 2020-01-26 Recovered 52
In [61]:
def plotly_area_plot(dfx,title):
    fig = px.area(dfx, x="Date", y="Count", color='Case')
    fig.update_layout(barmode='stack',title_text=title)
    fig.layout.template ='plotly_dark'
    fig.show()
    
title = 'Area plot of Growth of Total Numbers'
plotly_area_plot(dfx,title)  

Treemap Total Cases

In [62]:
tmp = dfc.groupby('Date').agg({'Confirmed':'sum',
                               'Deaths':'sum',
                               'Recovered':'sum',
                               'Active':'sum'}).reset_index()

tmp = tmp[tmp['Date']==max(tmp['Date'])].reset_index(drop=True)
tmp.style.background_gradient(cmap='Pastel1')
Out[62]:
Date Confirmed Deaths Recovered Active
0 2020-03-15 00:00:00 167448 6440 76034 91414
In [63]:
tm = tmp.melt(id_vars="Date", value_vars=['Active', 'Deaths', 'Recovered'])
tm
Out[63]:
Date variable value
0 2020-03-15 Active 91414
1 2020-03-15 Deaths 6440
2 2020-03-15 Recovered 76034
In [64]:
fig = px.treemap(tm,
                 path=["variable"],
                 values="value",
                 height=400,
                 width=600,
                 color_discrete_sequence=[REC, ACT, DTH])
fig.show()
In [65]:
def treemap_countries(dfx,exclude=0):
    tmp = dfx.sort_values(by='Confirmed', ascending=False)\
             .reset_index(drop=True).iloc[exclude:,:]
    
    tmp2 = dfx.sort_values(by='Deaths', ascending=False)\
              .reset_index(drop=True).iloc[exclude:,:]
    
    fig = px.treemap(tmp, 
                     path=["Country/Region"],
                     values="Confirmed", height=700,
                     title='Number of Confirmed Cases',
                     color_discrete_sequence=px.colors.qualitative.Prism)
    fig.data[0].textinfo = 'label+text+value'
    fig.show()

    fig = px.treemap(tmp2, 
                     path=["Country/Region"],
                     values="Deaths", height=700,
                     title='Number of Deaths reported',
                     color_discrete_sequence=px.colors.qualitative.Prism)
    fig.data[0].textinfo = 'label+text+value'
    fig.show()
    
treemap_countries(latest,exclude=0)
In [66]:
treemap_countries(latest,exclude=7)

Country-wise Growth of COVID

In [67]:
top10 = dfc.groupby('Country/Region')['Confirmed'].sum()\
.nlargest(10).index.tolist()

top10
Out[67]:
['China',
 'Italy',
 'South Korea',
 'Iran',
 'Spain',
 'Germany',
 'France',
 'United States',
 'Japan',
 'Switzerland']
In [68]:
tmp = dfc.groupby(['Date','Country/Region'])\
         .agg({'Confirmed':"sum"})\
         .reset_index()\
         .sort_values('Confirmed',ascending=False)\
         .loc[lambda x: x['Country/Region'].isin(top10)]
tmp.head(2)
Out[68]:
Date Country/Region Confirmed
7554 2020-03-15 China 81003
7412 2020-03-14 China 80977
In [69]:
fig = px.line(tmp,x="Date",
              y="Confirmed",
              color = "Country/Region")
fig.update_layout(title_text='Country-Wise Covid Distribution')
fig.layout.template='plotly_dark'
fig.show()

COVID Analsis per Continent

In [70]:
tmp = dfc.groupby('continent').sum()

tmp[["Confirmed","Deaths","Recovered","Active"]].style.background_gradient(cmap='Reds')
Out[70]:
Confirmed Deaths Recovered Active
continent
AF 1471 30 192 1279
AS 3129715 98838 1260674 1869041
EU 274190 10254 15535 258655
NA 15203 280 248 14955
OC 2191 35 480 1711
SA 1705 19 2 1703
In [71]:
dfx = dfc.groupby(['Date','continent'])\
         .agg({'Confirmed':"sum"})\
         .reset_index()

fig = px.line(dfx,
              x="Date",
              y="Confirmed",
              color = "continent")

title = 'Total Continent-Wise Covid Distribution'
fig.update_layout(title_text=title)
fig.layout.template ='plotly_dark'
fig.show()

Spread of Numbers

In [72]:
latest.head(2)
Out[72]:
Country/Region index Lat Long Confirmed Deaths Recovered Active
0 Afghanistan 13700 33.0000 65.0000 16 0 0 16
1 Albania 13770 41.1533 20.1683 42 1 0 42
In [73]:
spread = dfc[dfc['Confirmed']!=0]\
           .groupby('Date')['Country/Region']\
           .unique()\
           .apply(len)

spread = pd.DataFrame(spread).reset_index()
spread.head()
Out[73]:
Date Country/Region
0 2020-01-22 5
1 2020-01-23 7
2 2020-01-24 8
3 2020-01-25 10
4 2020-01-26 12
In [74]:
fig = px.line(spread,
              x='Date',
              y='Country/Region',
              text='Country/Region',
              title='Number of Countries/Regions to which COVID-19 spread over the time',
             color_discrete_sequence=[CNF,DTH, REC])
fig.update_traces(textposition='top center')
fig.show()

Map Visualizations

In [75]:
tmp = dfc[dfc["Date"] == max(dfc["Date"])].reset_index()
tmp = tmp.groupby('Country/Region').sum().reset_index()

print(tmp.shape)
tmp.head(2)
(142, 8)
Out[75]:
Country/Region index Lat Long Confirmed Deaths Recovered Active
0 Afghanistan 13700 33.0000 65.0000 16 0 0 16
1 Albania 13770 41.1533 20.1683 42 1 0 42
In [76]:
def plotly_mapplot(dfx,color_col,range_color=None,
                  colorscale='Plasma'):
    fig = px.choropleth(tmp,
            locations="Country/Region", 
            color=color_col,
            locationmode='country names',
            hover_name="Country/Region",
            range_color=range_color, 
            title=f'Latest Total {color_col} Cases',
            color_continuous_scale=colorscale
                       )
    fig.update(layout_coloraxis_showscale=True)
    fig.show()
    
plotly_mapplot(tmp,'Confirmed',[1,7_000])
In [77]:
plotly_mapplot(tmp,'Deaths',[1,7],'Earth')
In [78]:
plotly_mapplot(tmp,'Recovered',[1,7],'Portland')

Map visualization using Folium

In [79]:
tmp.head(2)
Out[79]:
Country/Region index Lat Long Confirmed Deaths Recovered Active
0 Afghanistan 13700 33.0000 65.0000 16 0 0 16
1 Albania 13770 41.1533 20.1683 42 1 0 42
In [80]:
import folium


m = folium.Map(location=[0, 0], tiles='cartodbpositron',
               min_zoom=1, max_zoom=4, zoom_start=1)

for i in range(0, len(tmp)):
    folium.Circle(
        location=[tmp.iloc[i]['Lat'], tmp.iloc[i]['Long']],
        color='crimson', 
        tooltip = ('<li><bold>Country : '+
                str(tmp.iloc[i]['Country/Region'])+
                   
                '<li><bold>Confirmed : '+
                str(tmp.iloc[i]['Confirmed'])+
                   
                '<li><bold>Deaths : '+
                str(tmp.iloc[i]['Deaths'])+
                   
                '<li><bold>Recovered : '+
                str(tmp.iloc[i]['Recovered'])
                  ),
        radius=int(tmp.iloc[i]['Confirmed'])**1.1).add_to(m)
m
Out[80]:

Plotly Scatter geo plot with animation

In [81]:
tmp = dfc.groupby(['Date', 'Country/Region'])\
                  .agg({'Confirmed':'max',
                        'Deaths':'max',
                        'Recovered':'max'})

tmp = tmp.reset_index()
tmp['size'] = tmp['Confirmed'].pow(0.3)

tmp['Date'] = pd.to_datetime(tmp['Date'])
tmp['Date'] = tmp['Date'].dt.strftime('%m/%d/%Y')

tmp.head()
Out[81]:
Date Country/Region Confirmed Deaths Recovered size
0 01/22/2020 Afghanistan 0 0 0 0.0
1 01/22/2020 Albania 0 0 0 0.0
2 01/22/2020 Algeria 0 0 0 0.0
3 01/22/2020 Andorra 0 0 0 0.0
4 01/22/2020 Antigua and Barbuda 0 0 0 0.0
In [82]:
fig = px.scatter_geo(tmp, locations="Country/Region", locationmode='country names', 
                     color="Confirmed", size='size', hover_name="Country/Region", 
                     range_color= [0, max(tmp['Confirmed'])+2], 
                     projection="natural earth", animation_frame="Date", 
                     title='Spread over time')
fig.update(layout_coloraxis_showscale=False)
fig.show()

Embed External Website Animation

In [83]:
from IPython.display import HTML

HTML('''<div class="flourish-embed flourish-bar-chart-race" data-src="visualisation/1571387"><script src="https://public.flourish.studio/resources/embed.js"></script></div>''')
Out[83]:

Time Taken

In [84]:
time_taken = time.time() - time_start_notebook
h,m = divmod(time_taken,60*60)
print('Time taken: {:.0f} hr {:.0f} min {:.0f} secs'.format(h, *divmod(m,60)))
Time taken: 0 hr 0 min 33 secs